pollution_gather_aqi %>%
group_by(pollutants,year) %>%
summarize(mean_aqi = mean(aqi, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = mean_aqi, color = pollutants)) +
geom_point() + geom_line() +
theme(legend.position = "bottom")
From the point and line plot given above, we can see that overall the
pollution_aqi %>%
mutate(year = fct_reorder(year, no2_aqi)) %>%
plot_ly(y = ~no2_aqi, color = ~year, type = "box",
colors = "Set2")
pollution_aqi %>%
mutate(year = fct_reorder(year, o3_aqi)) %>%
plot_ly(y = ~o3_aqi, color = ~year, type = "box",
colors = "Set2")
pollution_aqi %>%
mutate(year = fct_reorder(year, so2_aqi)) %>%
plot_ly(y = ~so2_aqi, color = ~year, type = "box",
colors = "Set2")
pollution_aqi %>%
mutate(year = fct_reorder(year, co_aqi)) %>%
plot_ly(y = ~co_aqi, color = ~year, type = "box",
colors = "Set2")
## Warning: Ignoring 45 observations
From the box plots given above, we can further categorize our data into different level of aqi given each pollutants We have searched the cut off point for each pollutants
pollution_aqi_cat <- pollution_aqi %>%
mutate(., o3_aqi_cat = ifelse(o3_aqi %in% 0:54, "good", ifelse(o3_aqi %in% 55:70, "moderate", ifelse(o3_aqi %in% 71:85, "unhealthy for senstitive", ifelse(o3_aqi %in% 86:105, "unhealth", "very unhealthy")))),
co_aqi_cat = ifelse(co_aqi %in% 0:4.4, "good", ifelse(co_aqi %in% 4.5:9.4, "moderate", ifelse(co_aqi %in% 9.5:12.4, "unhealthy for senstitive", ifelse(co_aqi %in% 12.5:15.4, "unhealth", ifelse(co_aqi %in% 15.5:30.4, "very unhealth", "hazardous"))))),
so2_aqi_cat = ifelse(so2_aqi %in% 0:35, "good", ifelse(so2_aqi %in% 36:75, "moderate", ifelse(so2_aqi %in% 76:185, "unhealthy for senstitive", ifelse(so2_aqi %in% 186:304, "unhealth", ifelse(so2_aqi %in% 305:604, "very unhealth", "hazardous"))))),
no2_aqi_cat = ifelse(no2_aqi %in% 0:53, "good", ifelse(no2_aqi %in% 54:100, "moderate", ifelse(no2_aqi %in% 101:360, "unhealthy for senstitive", ifelse(no2_aqi %in% 361:649, "unhealth", ifelse(no2_aqi %in% 650:1249, "very unhealth", "hazardous"))))),
) %>%
select(-no2_aqi, -co_aqi, -so2_aqi, -o3_aqi)
pollution_aqi_cat <- pollution_aqi_cat %>%
gather(key = pollutants, value = aqi, o3_aqi_cat:no2_aqi_cat)
pollution_aqi_cat %>%
group_by(year, aqi) %>%
summarize(n = n()) %>%
spread(key = aqi, value = n)
## # A tibble: 7 x 8
## # Groups: year [7]
## year good hazardous moderate unhealth `unhealthy for senstitive`
## * <chr> <int> <int> <int> <int> <int>
## 1 10 36967 16180 2513 380 650
## 2 11 47392 23246 4470 451 954
## 3 12 55143 21028 2347 399 620
## 4 13 64538 15412 2070 320 623
## 5 14 72417 6585 1065 182 438
## 6 15 68940 7083 1070 257 487
## 7 16 14701 1570 119 7 29
## # ... with 2 more variables: `very unhealth` <int>, `very unhealthy` <int>
#overall aqi bar plot
pollution_aqi_cat %>%
mutate(year = fct_infreq(year)) %>%
ggplot(aes(x = year, fill = aqi)) + geom_bar()
#o3_aqi bar plot
pollution_aqi_cat %>%
filter(pollutants == "o3_aqi_cat") %>%
ggplot(aes(x = year, fill = aqi)) + geom_bar()
#co
pollution_aqi_cat %>%
filter(pollutants == "co_aqi_cat") %>%
ggplot(aes(x = year, fill = aqi)) + geom_bar()
#no2
pollution_aqi_cat %>%
filter(pollutants == "no2_aqi_cat") %>%
ggplot(aes(x = year, fill = aqi)) + geom_bar()
#so2
pollution_aqi_cat %>%
filter(pollutants == "so2_aqi_cat") %>%
ggplot(aes(x = year, fill = aqi)) + geom_bar()